# -*- coding: utf-8 -*-
import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from xiecheng.items import XiechengItem


class XiechengSpiderSpider(scrapy.Spider):
    name = 'xiecheng'
    allowed_domains = ['hotels.ctrip.com']
    start_urls = ['https://hotels.ctrip.com/domestic-city-hotel.html/']

    def parse(self, response):
        indexlist = response.xpath("//dt")
        for item in indexlist:
            xiecheng_item = XiechengItem()
            Initials = item.xpath("text()").extract_first()
            xiecheng_item['Initials'] = Initials
            content = item.xpath("following-sibling::dd[1]/a")
            for con in content:
                xiecheng_item['emtitle'] = con.xpath("@title").extract_first()
                xiecheng_item['emname'] = con.xpath("text()").extract_first()
                xiecheng_item['emhref'] = con.xpath("@href").extract_first()

                yield xiecheng_item


if __name__ == '__main__':
    process = CrawlerProcess(get_project_settings())
    process.crawl('xiecheng')
    process.start()
